<?php
if (! defined ( 'BASEPATH' ))
	exit ( 'No direct script access allowed' );
class Crawlermanga extends MX_Controller {
	
	function __construct() {
		$this->load->model ( 'manga_model' );
		parent::__construct ();
	}
	
	function index() {
		ini_set('max_execution_time', 24000000);
		$this->manga_model->query ( 'TRUNCATE TABLE catalog_manga' );
		$manga = array ();
		$links = $this->_extract_manga_links ( 'http://truyen.vnsharing.net/DanhSach' );
		$manga = $links['manga'];
		$groups = $links['group'];
		?>
		<table width="100%" border="">
		<thead>
			<tr>
				<th>Name</th>
				<th>Code</th>
				<th>Image</th>
				<th>Description</th>
				<th>Author</th>
				<th>Genres</th>
				<th>Group</th>
				<th>Status</th>
				<th>Crawler Url</th>
			</tr>
		</thead>
		<tbody>
		<?php
		$i = 0;
		foreach ( $manga as $link ) {
			//if ($i > 0) {break;}
			if (!isset($group[$i])) {
				$group[$i] = 'No Author';
			}
			$data = $this->_extract_manga_infor ( $link, $group[$i]);
			$i++;
		}
		?>
		</tbody>
		</table>
		<?php 
		echo('<h4>'.count($links).'</h4>');
	}
	
	function _extract_manga_links($url) {
		$data = file_get_contents ( $url );
		$data = str_replace ( '<a href="/', '<a href="http://truyen.vnsharing.net/', $data );
		if (preg_match_all ( '/((?:http|https):\/\/(?:www\.)*(?:[a-zA-Z0-9_\-]{1,15}\.+[a-zA-Z0-9_]{1,}){1,}(?:[a-zA-Z0-9_\/\.\-\?\&\:\%\,\!\;]*))/', $data, $manga_urls )) {
			$filter_level0 = array ();
			foreach ( $manga_urls ['0'] as $_manga ) {
				if (stripos ( $_manga, '/Truyen/' ) > 0 & substr_count ( $_manga, '/' ) == 4) {
					$filter_level0['manga'][] = $_manga;
				}
				if (stripos ( $_manga, '/Nhom/' ) > 0) {
					$filter_level0['group'][] = $_manga;
				}
			}
			return $filter_level0;
		}
		return array ();
	}
	
	function _extract_manga_infor($link, $group) {
		$name = trim(str_replace('-', ' ', substr($link, strpos($link, '/Truyen/') + strlen('/Truyen/'), strlen($link))));
		$group = trim(str_replace('-', ' ', substr($group, strpos($group, '/Nhom/') + strlen('/Nhom/'), strlen($group))));
		$code = strtolower(str_replace(' ', '-', $name));
		$content = file_get_contents ( $link );
		preg_match('/\<span class=\"info\"\>Sơ lược:\<\/span\>[\s\t]*(.+)/', $content, $desc);
		preg_match('/\<a href\=\"\/TheLoai.*?\".*?\>(.*)\<\/a\>\s/', $content, $genres);
		preg_match('/\<a href\=\"\/TacGia.*?\".*?\>(.*)\<\/a\>/', $content, $author);
		preg_match('/\<img width\=\"190px\" height\=\"250px\" src\=\"(.*?)\" \/\>/', $content, $image);
		preg_match('/\<span class\=\"info\"\>Tình trạng:\<\/span\>(&nbsp;)*(.*)(&nbsp;)*/', $content, $status);
		if (!isset($desc[1])) {
			$desc = 'CAN NOT CRAWLER';
		} else {
			$desc = $desc[1];
		}
		if (!isset($author[1])) {
			$author[1] = 'CAN NOT CRAWLER';
		}
		if (!isset($genres[1])) {
			$genres[1] = 'CAN NOT CRAWLER';
		}
		if (!isset($image[1])) {
			$image[1] = '';
		}
		$data['name'] = addslashes($name);
		$data['code'] = addslashes($code);
		$data['category_ids'] = addslashes(strip_tags($genres[1]));
		$data['alternate_name'] = $name;
		$data['manga_type_id'] = '1';
		$data['national_type_id'] = '1';
		$data['user_id'] = '1';
		$data['author'] = addslashes(strip_tags($author[1]));
		$data['artist'] = addslashes(strip_tags($author[1]));
		$data['summary'] = addslashes(strip_tags($desc));
		$data['description'] = addslashes(strip_tags($desc));
		$data['meta_title'] = addslashes('Manga ' . $data['name']);
		$data['meta_keywords'] = addslashes('comet,manga,' . $data['name'] . ',' . $data['code'] . ',' . $data['author']);
		$data['meta_desc'] = addslashes($data['description']);  
		$data['created_at'] = 'function:NOW()';
		$data['updated_at'] = 'function:NOW()';
		?>
		<tr>
			<td valign="top"><?php echo ($name) ?></td>
			<td valign="top"><?php echo ($code) ?></td>
			<td valign="top">
				<img width="190px" height="250px" src="<?php echo ($image[1]) ?>" />
			</td>
			<td valign="top"><?php echo strip_tags($desc) ?></td>
			<td valign="top"><?php echo strip_tags($author[1]) ?></td>
			<td valign="top"><?php echo strip_tags($genres[1]) ?></td>
			<td valign="top"><?php echo ($group) ?></td>
			<td valign="top"><?php echo ($status[2]) ?></td>
			<td valign="top"><a href="<?php echo $link ?>" target="_blank"><?php echo $link ?></a></td>
		</tr>
		<?php
		$model = new manga_model();
		$model->setEntireData($data);
		$model->save();
		return $data;
	}
}
?>
